{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import re\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "\n",
    "# Load all files from a directory in a DataFrame.\n",
    "def load_directory_data(directory):\n",
    "    data = {}\n",
    "    data[\"sentence\"] = []\n",
    "    data[\"sentiment\"] = []\n",
    "    for file_path in os.listdir(directory):\n",
    "        with tf.gfile.GFile(os.path.join(directory, file_path), \"r\") as f:\n",
    "            data[\"sentence\"].append(f.read())\n",
    "            data[\"sentiment\"].append(\n",
    "                re.match(\"\\d+_(\\d+)\\.txt\", file_path).group(1))\n",
    "    return pd.DataFrame.from_dict(data)\n",
    "\n",
    "\n",
    "# Merge positive and negative examples, add a polarity column and shuffle.\n",
    "def load_dataset(directory):\n",
    "    pos_df = load_directory_data(os.path.join(directory, \"pos\"))\n",
    "    neg_df = load_directory_data(os.path.join(directory, \"neg\"))\n",
    "    pos_df[\"polarity\"] = 1\n",
    "    neg_df[\"polarity\"] = 0\n",
    "    return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)\n",
    "\n",
    "\n",
    "# Download and process the dataset files.\n",
    "def download_and_load_datasets(force_download=False):\n",
    "    dataset = tf.keras.utils.get_file(\n",
    "        fname=\"aclImdb.tar.gz\",\n",
    "        origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n",
    "        cache_subdir=r'F:\\testDemo\\AI\\estimator\\data\\text',\n",
    "        extract=True)\n",
    "    print(dataset)\n",
    "    print(os.path.dirname(dataset))\n",
    "\n",
    "    train_df = load_dataset(\n",
    "        os.path.join(os.path.dirname(dataset), \"aclImdb\", \"train\"))\n",
    "    test_df = load_dataset(\n",
    "        os.path.join(os.path.dirname(dataset), \"aclImdb\", \"test\"))\n",
    "\n",
    "    return train_df, test_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F:\\testDemo\\AI\\estimator\\data\\text\\aclImdb.tar.gz\n",
      "F:\\testDemo\\AI\\estimator\\data\\text\n"
     ]
    }
   ],
   "source": [
    "train, test = download_and_load_datasets()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 25000 entries, 0 to 24999\n",
      "Data columns (total 3 columns):\n",
      "sentence     25000 non-null object\n",
      "sentiment    25000 non-null object\n",
      "polarity     25000 non-null int64\n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 586.0+ KB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 25000 entries, 0 to 24999\n",
      "Data columns (total 3 columns):\n",
      "sentence     25000 non-null object\n",
      "sentiment    25000 non-null object\n",
      "polarity     25000 non-null int64\n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 586.0+ KB\n"
     ]
    }
   ],
   "source": [
    "test.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "train=train\n",
    "test=test.sample(5000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 25000 entries, 0 to 24999\n",
      "Data columns (total 3 columns):\n",
      "sentence     25000 non-null object\n",
      "sentiment    25000 non-null object\n",
      "polarity     25000 non-null int64\n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 586.0+ KB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentence</th>\n",
       "      <th>sentiment</th>\n",
       "      <th>polarity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>This is really a very bad movie. Why? First of...</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>I can hardly believe that this inert, turgid a...</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>I first saw this movie when it originally came...</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Superb cast, more please!&lt;br /&gt;&lt;br /&gt;If you ca...</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>You can debate Prince's acting talent, or even...</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            sentence sentiment  polarity\n",
       "0  This is really a very bad movie. Why? First of...         3         0\n",
       "1  I can hardly believe that this inert, turgid a...         4         0\n",
       "2  I first saw this movie when it originally came...        10         1\n",
       "3  Superb cast, more please!<br /><br />If you ca...        10         1\n",
       "4  You can debate Prince's acting talent, or even...         8         1"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "MAX_SEQ_LENGTH=128 # 可根据自己实际 设定\n",
    "embedding_dim=50\n",
    "dropout=0.5\n",
    "vocab_size=89528 # 根据自己实际情况设置\n",
    "BATCH_SIZE=128\n",
    "EPOCH=5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 接下来 还是 参照我之前的 estimator template"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_model(features,vocab_size,embedding_dim,drop_out,output_cls,max_seq_length):\n",
    "    # 定义网络结构 和  返回值\n",
    "    if isinstance(features,dict):\n",
    "        features=features['text']\n",
    "#     tf.logging.info('call create_model')\n",
    "#     inputlayer=tf.feature_column.input_layer(features,feature_columns)\n",
    "    # inputlayer 形状是[batch_size,MAX_SEQ_LENGTH]\n",
    "#     tf.logging.info('input layer shape si :'+str(inputlayer.shape))\n",
    "#     inputlayer=tf.cast(inputlayer,tf.int64)\n",
    "    table=tf.contrib.lookup.index_table_from_file(vocabulary_file='data/text/aclImdb/imdb.vocab',default_value=0)\n",
    "    tokens=table.lookup(features)\n",
    "    \n",
    "    \n",
    "    # embedding layer,如果使用 pre-trained matrix，请参考 https://github.com/AlbertBJ/ChineseNER-Based-DL/blob/master/Model.py\n",
    "    embedding=tf.get_variable('embedding',[vocab_size,embedding_dim],dtype=tf.float32)\n",
    "    lstm_input=tf.nn.embedding_lookup(embedding,tokens,name='embedding_layer')\n",
    "    # lstm_input shape is [batch_size,max-seq_length,embedding_dim]\n",
    "    \n",
    "    # 使用bi-lstm\n",
    "#     fw=tf.contrib.rnn.LSTMBlockCell(num_units=lstm_input,name='fw')\n",
    "    # 此处由于 分类使用，故而 设置 return_sequences=False,只返回 最后一个单元的 输出\n",
    "    lstm=tf.keras.layers.LSTM(units=embedding_dim,dropout=drop_out,return_sequences=False)\n",
    "    bi_lstm=tf.keras.layers.Bidirectional(lstm,merge_mode='concat',name='bi_lstm')\n",
    "    dense_input=bi_lstm(lstm_input)\n",
    "    # dense_input shape is [batch_size,2*embedding_dim]\n",
    "    dense_1=tf.keras.layers.Dense(units=embedding_dim,activation='relu',name='dense_1')(dense_input)\n",
    "    dropout_1=tf.keras.layers.Dropout(drop_out,name='drop_out_1')(dense_1)\n",
    "    logits=tf.keras.layers.Dense(units=output_cls,name='out_put')(dropout_1)\n",
    "    return logits "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def  model_fn_builder(lr,threshold):\n",
    "    # 该方法实际 创建 estimator的model_fn\n",
    "    # 可以 有其他操作\n",
    "    def model_fn(features, labels, mode, params,config): # estimator需要的model_fn参数固定\n",
    "        '''\n",
    "        features: from input_fn的返回  切记返回的顺序\n",
    "        labels： from input_fn 的返回  切记返回的顺序\n",
    "        mode: tf.estimator.ModeKeys实例的一种\n",
    "        params: 在初始化estimator时 传入的参数列表，dict形式,或者直接使用self.params也可以\n",
    "        config:初始化estimator时 的 Runconfig\n",
    "\n",
    "        '''\n",
    "        if not (mode==tf.estimator.ModeKeys.PREDICT):\n",
    "            labels=tf.reshape(labels,[-1,1])\n",
    "            tf.logging.info('labels shape:'+ str(labels.shape))\n",
    "        logits=create_model(features,params['vocab_size'],params['embedding_dim'],params['drop_out'],params['output_cls'],params['max_seq_length'])\n",
    "\n",
    "        tf.logging.info('logits shape:'+ str(logits.shape))\n",
    "        \n",
    "        \n",
    "        #         pre_cls=tf.math.argmax(input=logits,axis=1)\n",
    "        pre_prob=tf.nn.sigmoid(logits) # 在此处 由于 是 二分类，此值 是 y=1的概率\n",
    "        tf.logging.info('pre_prob shape:'+ str(pre_prob.shape))\n",
    "        # 根据 自己需要调整 阈值 threshold\n",
    "        pre_cls=tf.where(pre_prob<threshold,tf.zeros_like(pre_prob),tf.ones_like(pre_prob))\n",
    "        \n",
    "        is_predict=mode==tf.estimator.ModeKeys.PREDICT\n",
    "        if not is_predict:\n",
    "            # train .eval\n",
    "            loss=tf.losses.sigmoid_cross_entropy(labels,logits=logits)\n",
    "\n",
    "            def metric_fn(labels,predictions):\n",
    "                '''\n",
    "                define metrics\n",
    "                '''\n",
    "                accuracy,accuracy_update=tf.metrics.accuracy(labels=labels,predictions=predictions,name='text_accuracy')\n",
    "                recall,recall_update=tf.metrics.recall(labels=labels,predictions=predictions,name='text_recall')\n",
    "                precision,precision_update=tf.metrics.precision(labels=labels,predictions=predictions,name='text_precision')\n",
    "\n",
    "\n",
    "                return {\n",
    "                    'accuracy':(accuracy,accuracy_update),\n",
    "                    'recall':(recall,recall_update),\n",
    "                    'precision':(precision,precision_update)                  \n",
    "                }\n",
    "\n",
    "\n",
    "            if mode==tf.estimator.ModeKeys.EVAL:\n",
    "                return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=metric_fn(labels,pre_cls))\n",
    "\n",
    "            # train process\n",
    "            train_op=tf.train.AdamOptimizer(learning_rate=lr).minimize(loss=loss,global_step=tf.train.get_global_step())\n",
    "            return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op,eval_metric_ops=metric_fn(labels,pre_cls))\n",
    "\n",
    "\n",
    "        else:\n",
    "            # 此处转换只针对 二分类\n",
    "            neg=tf.identity(1-pre_prob)\n",
    "            pro=tf.where(pre_prob<threshold,neg,pre_prob)\n",
    "            \n",
    "            predictions={'predict_cls':pre_cls,'predict_pro':pro}\n",
    "            return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions)     \n",
    "    return model_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "def input_fn_builder(x,y,batch_size,epochs,max_seq_length,is_train=True):\n",
    "    '''\n",
    "    创建 输入函数闭包\n",
    "    \n",
    "    '''\n",
    "    def pad_or_trunc(t):\n",
    "        dim = tf.size(t)\n",
    "        return tf.cond(tf.equal(dim, max_seq_length), lambda: t, lambda: tf.cond(tf.greater(dim, max_seq_length), lambda: tf.slice(t, [0], [max_seq_length]), lambda: tf.concat([t, tf.fill([max_seq_length-dim],'UNK')], 0)))\n",
    "    \n",
    "    \n",
    "    def token(d,y):\n",
    "        \n",
    "\n",
    "        words=tf.string_split([d['text']])\n",
    "        tokens=tf.sparse_tensor_to_dense(words,default_value='UNK')\n",
    "        tf.logging.info('token 0 is type:'+str(type(tokens)))\n",
    "\n",
    "        pad=pad_or_trunc(tf.reshape(tokens,[-1]))\n",
    "        \n",
    "        tf.logging.info('pad shape is :'+str(type(pad)))\n",
    "        return {'text':pad},y    \n",
    "    def input_fn():\n",
    "        tf.logging.info('call input_fn')\n",
    "        dataset=tf.data.Dataset.from_tensor_slices(({'text':x},y) )  \n",
    "        if is_train:\n",
    "            dataset=dataset.shuffle(1000).repeat(epochs)\n",
    "        dataset=dataset.map(token)\n",
    "        dataset=dataset.batch(batch_size)\n",
    "        return dataset # 返回的 顺序要和 model_fn一致 或者 dataset元素 格式为（features,label）元组 也可以\n",
    "    return input_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "#对于 中文来说，目前 如果要使用\n",
    "# def preprocess_func(x):\n",
    "#     ret= \"*\".join(x.decode('utf-8'))\n",
    "#     return ret\n",
    "\n",
    "# str_t = tf.py_func(\n",
    "#         preprocess_func,\n",
    "#         [tf.constant(strs)],\n",
    "#         tf.string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "def serving_input_receiver_fn():\n",
    "    def pad_or_trunc(t):\n",
    "        dim = tf.size(t)\n",
    "        return tf.cond(tf.equal(dim, MAX_SEQ_LENGTH), lambda: t, lambda: tf.cond(tf.greater(dim, MAX_SEQ_LENGTH), lambda: tf.slice(t, [0], [MAX_SEQ_LENGTH]), lambda: tf.concat([t, tf.fill([MAX_SEQ_LENGTH-dim],'UNK')], 0)))\n",
    " \n",
    "    \n",
    "    def process(text):\n",
    "        \"\"\"split ,pad and truncate\"\"\"\n",
    "        words=tf.string_split([text])\n",
    "        tokens=tf.sparse_tensor_to_dense(words,default_value='UNK')\n",
    "        padded=pad_or_trunc(tf.reshape(tokens,[-1]))       \n",
    "        return padded\n",
    "\n",
    "     # Optional; currently necessary for batch prediction.\n",
    "    key_input = tf.placeholder(tf.string, shape=[None]) \n",
    "    key_output = tf.identity(key_input)\n",
    "\n",
    "    input_ph = tf.placeholder(tf.string, shape=[None], name='texts')\n",
    "    text_tensor = tf.map_fn(\n",
    "      process, input_ph, back_prop=False, dtype=tf.string)\n",
    "    receiver_tensors = {'texts': input_ph}\n",
    "   \n",
    "    features = {\n",
    "       'text': text_tensor\n",
    "    }    \n",
    "    \n",
    "  \n",
    "    return tf.estimator.export.ServingInputReceiver(features,receiver_tensors)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_dir=r'F:\\testDemo\\AI\\estimator\\model\\text'\n",
    "params={}\n",
    "\n",
    "output_cls=1\n",
    "# params['feature_columns']=feature_columns\n",
    "params['output_cls']=output_cls\n",
    "params['vocab_size']=vocab_size\n",
    "params['embedding_dim']=embedding_dim\n",
    "params['drop_out']=dropout\n",
    "params['max_seq_length']=MAX_SEQ_LENGTH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Using config: {'_model_dir': 'F:\\\\testDemo\\\\AI\\\\estimator\\\\model\\\\text', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 100, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true\n",
      "graph_options {\n",
      "  rewrite_options {\n",
      "    meta_optimizer_iterations: ONE\n",
      "  }\n",
      "}\n",
      ", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001776DC0ADA0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
     ]
    }
   ],
   "source": [
    "config=tf.estimator.RunConfig(save_checkpoints_steps=100)\n",
    "\n",
    "estimator=tf.estimator.Estimator(model_fn=model_fn_builder(0.001,0.5),model_dir=model_dir,params=params,config=config)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "如果需要加入 early_stop  则可以使用 train_and_evaluate,然后指定TrainSpec，EvalSpec中的hooks就可以了，具体可以参考官网"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:call input_fn\n",
      "INFO:tensorflow:token 0 is type:<class 'tensorflow.python.framework.ops.Tensor'>\n",
      "INFO:tensorflow:pad shape is :<class 'tensorflow.python.framework.ops.Tensor'>\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:labels shape:(?, 1)\n",
      "INFO:tensorflow:call create_model\n",
      "INFO:tensorflow:logits shape:(?, 1)\n",
      "INFO:tensorflow:pre_prob shape:(?, 1)\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt-1368\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 1368 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:loss = 1.5149491, step = 1369\n",
      "INFO:tensorflow:Saving checkpoints for 1468 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.3262\n",
      "INFO:tensorflow:loss = 0.40239045, step = 1469 (30.067 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 1568 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.40725\n",
      "INFO:tensorflow:loss = 0.17320392, step = 1569 (29.349 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 1668 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.30519\n",
      "INFO:tensorflow:loss = 0.2907487, step = 1669 (30.255 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 1768 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.27128\n",
      "INFO:tensorflow:loss = 0.1325854, step = 1769 (30.569 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 1868 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.51867\n",
      "INFO:tensorflow:loss = 0.16754751, step = 1869 (28.420 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 1968 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.41493\n",
      "INFO:tensorflow:loss = 0.094051816, step = 1969 (29.284 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 2068 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.25428\n",
      "INFO:tensorflow:loss = 0.11071127, step = 2069 (30.728 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 2168 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.46903\n",
      "INFO:tensorflow:loss = 0.06222876, step = 2169 (28.827 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 2268 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:global_step/sec: 3.46984\n",
      "INFO:tensorflow:loss = 0.039177723, step = 2269 (28.820 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 2345 into F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 0.12086187.\n"
     ]
    }
   ],
   "source": [
    "train_result=estimator.train(input_fn=input_fn_builder(x=train['sentence'],y=train['polarity'],batch_size=BATCH_SIZE,epochs=EPOCH,max_seq_length=MAX_SEQ_LENGTH,is_train=True),steps=10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:call input_fn\n",
      "INFO:tensorflow:token 0 is type:<class 'tensorflow.python.framework.ops.Tensor'>\n",
      "INFO:tensorflow:pad shape is :<class 'tensorflow.python.framework.ops.Tensor'>\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:labels shape:(?, 1)\n",
      "INFO:tensorflow:call create_model\n",
      "INFO:tensorflow:logits shape:(?, 1)\n",
      "INFO:tensorflow:pre_prob shape:(?, 1)\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Starting evaluation at 2019-06-19T06:43:58Z\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt-2345\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Finished evaluation at 2019-06-19-06:44:01\n",
      "INFO:tensorflow:Saving dict for global step 2345: accuracy = 0.7706, global_step = 2345, loss = 0.9025137, precision = 0.7454058, recall = 0.80156446\n",
      "INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2345: F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt-2345\n"
     ]
    }
   ],
   "source": [
    "eval_result=estimator.evaluate(input_fn=input_fn_builder(x=test['sentence'],y=test['polarity'],batch_size=BATCH_SIZE,epochs=EPOCH,max_seq_length=MAX_SEQ_LENGTH,is_train=False),steps=10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:call create_model\n",
      "INFO:tensorflow:logits shape:(?, 1)\n",
      "INFO:tensorflow:pre_prob shape:(?, 1)\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Signatures INCLUDED in export for Classify: None\n",
      "INFO:tensorflow:Signatures INCLUDED in export for Regress: None\n",
      "INFO:tensorflow:Signatures INCLUDED in export for Predict: ['serving_default']\n",
      "INFO:tensorflow:Signatures INCLUDED in export for Train: None\n",
      "INFO:tensorflow:Signatures INCLUDED in export for Eval: None\n",
      "INFO:tensorflow:Restoring parameters from F:\\testDemo\\AI\\estimator\\model\\text\\model.ckpt-2345\n",
      "INFO:tensorflow:Assets added to graph.\n",
      "INFO:tensorflow:Assets written to: export_base/text\\temp-b'1561009656'\\assets\n",
      "INFO:tensorflow:SavedModel written to: export_base/text\\temp-b'1561009656'\\saved_model.pb\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "b'export_base/text\\\\1561009656'"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "estimator.export_savedmodel('export_base/text',serving_input_receiver_fn=serving_input_receiver_fn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## post body:<br>\n",
    "{\"instances\" : [{\"texts\":\"I went and saw this movie last night after being coaxed to by a few friends of mine. I'll admit that I was reluctant to see it because from what I knew of Ashton Kutcher he was only able to do comedy. I was wrong. Kutcher played the character of Jake Fischer very well, and Kevin Costner played Ben Randall with such professionalism. The sign of a good movie is that it can toy with our emotions. This one did exactly that. The entire theater (which was sold out) was overcome by laughter during the first half of the movie, and were moved to tears during the second half. While exiting the theater I not only saw many women in tears, but many full grown men as well, trying desperately not to let anyone see them crying. This movie was great, and I suggest that you go see it before you judge.\"},\n",
    "{\"texts\":\"How many movies are there that you can think of when you see a movie like this? I can't count them but it sure seemed like the movie makers were trying to give me a hint. I was reminded so often of other movies, it became a big distraction. One of the borrowed memorable lines came from a movie from 2003 - Day After Tomorrow. One line by itself, is not so bad but this movie borrows so much from so many movies it becomes a bad risk.BUT...See The Movie! Despite its downfalls there is enough to make it interesting and maybe make it appear clever. While borrowing so much from other movies it never goes overboard. In fact, you'll probably find yourself battening down the hatches and riding the storm out. Why? ...Costner and Kutcher played their characters very well. I have never been a fan of Kutcher's and I nearly gave up on him in The Guardian, but he surfaced in good fashion. Costner carries the movie swimmingly with the best of Costner's ability. I don't think Mrs. Robinson had anything to do with his success.The supporting cast all around played their parts well. I had no problem with any of them in the end. But some of these characters were used too much From here on out I can only nit-pick so I will save you the wear and tear. Enjoy the movie, the parts that work, work well enough to keep your head above water. Just don't expect a smooth ride.7 of 10 but almost a 6.\"},\n",
    "{\"texts\":\"I attended an advance screening of this film not sure of what to expect from Kevin Costner and Ashton Kutcher; both have delivered less than memorable performances & films. While the underlying general storyline is somewhat familiar, this film was excellent. Both Costner and Kutcher delivered powerful performances playing extremely well off each other. The human frailties and strengths of their respective characters were incredibly played by both; the scene when Costner confronts Kutcher with the personal reasons why Kutcher joined the Coast Guard rescue elite was the film's most unforgettable emotional moment. The specific storyline was an education in itself depicting the personal sacrifice and demanding physical training the elite Coast Guard rescuers must go through in preparation of their only job & responsibility...to save lives at sea. The special effects of the rescue scenes were extremely realistic and wowing...I haven't seen such angry seas since The Perfect Storm. Co-star Clancy Brown (HBO's Carnivale - great to see him again) played the captain of the Coast Guard's Kodiak, Alaska base in a strong, convincing role as a leader with the prerequisite and necessary ice water in his veins. The film wonderfully, and finally, gives long overdue exposure and respect to the Coast Guard; it had the audience applauding at the end.\"}\n",
    "]}<br>\n",
    "## 格式：<br>\n",
    "{instances:[{\"text\":\"sentence\"},{\"text\":\"sentence\"},...,{\"text\":\"sentence\"}]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## return format:\n",
    "\n",
    "{\n",
    "    \"predictions\": [\n",
    "        {\n",
    "            \"predict_pro\": [\n",
    "                0.815035\n",
    "            ],\n",
    "            \"predict_cls\": [\n",
    "                0\n",
    "            ]\n",
    "        },\n",
    "        {\n",
    "            \"predict_pro\": [\n",
    "                0.591119\n",
    "            ],\n",
    "            \"predict_cls\": [\n",
    "                0\n",
    "            ]\n",
    "        },\n",
    "        {\n",
    "            \"predict_pro\": [\n",
    "                0.994651\n",
    "            ],\n",
    "            \"predict_cls\": [\n",
    "                1\n",
    "            ]\n",
    "        }\n",
    "    ]\n",
    "}<br>\n",
    "\n",
    "foramt:<br>\n",
    "{\"predictions\":[{\"predict_pro:[pro_value],\"predict_cls\":[cls_value]},...,{\"predict_pro:[pro_value],\"predict_cls\":[cls_value]}]}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py3_NER",
   "language": "python",
   "name": "ner"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
